XGBoost wrapper for seurat project in R
source("tianfengRwrappers.R")
library(xgboost)
library(Matrix)
library(mclust)
library(tidyverse)
library(SHAPforxgboost)
ds0 <- readRDS("ds0.rds")
ds1 <- readRDS("ds1.rds")
ds2 <- readRDS("ds2.rds")
XGBoost_train_from_seuobj <- function(seuobj, is_highvar = T, test_ratio = 0.3, seed = 7)
## set test_ratio to 0 to avoid extracting test from dataset
{
set.seed(seed)
seuobj_label <- as.numeric(as.character(Idents(seuobj)))
if(is.na(seuobj_label[1])) # check vaild Idents
stop("Please ensure that seurat idents are in numeric forms")
# colnames(seuobj_data) <- NULL
seuobj_data <- get_data_table(seuobj, highvar = T, type = "data")
xgb_param <- list(eta = 0.2, max_depth = 6,
subsample = 0.6, num_class = length(table(Idents(seuobj))),
objective = "multi:softprob", eval_metric = 'mlogloss')
if(test_ratio == 0) {
seuobj_train_data <- list(data = t(as(seuobj_data,"dgCMatrix")), label = seuobj_label)
# use whole dataset as train data
seuobj_train <- xgb.DMatrix(data = seuobj_train_data$data,label = seuobj_train_data$label)
bst_model <- xgb.train(xgb_param, seuobj_train, nrounds = 100, verbose = 0)
} else {
index <- c(1:dim(seuobj_data)[2]) %>% sample(ceiling(test_ratio*dim(seuobj_data)[2]), replace = F, prob = NULL)
seuobj_train_data <- list(data = t(as(seuobj_data[,-index],"dgCMatrix")), label = seuobj_label[-index])
seuobj_test_data <- list(data = t(as(seuobj_data[,index],"dgCMatrix")), label = seuobj_label[index])
seuobj_test <- xgb.DMatrix(data = seuobj_test_data$data,label = seuobj_test_data$label)
seuobj_train <- xgb.DMatrix(data = seuobj_train_data$data,label = seuobj_train_data$label)
watchlist <- list(train = seuobj_train, eval = seuobj_test)
bst_model <- xgb.train(xgb_param, seuobj_train, nrounds = 100, watchlist, verbose = 0)
}
return(bst_model)
}
# saveRDS(bst_model, "ds2_model.rds")
show_train_loss <- function(nrounds = 100) #when $ test_ratio \neq 0 $ show loss in watchlist
{
eval_loss <- bst_model[["evaluation_log"]][["eval_mlogloss"]]
plot_ly(data.frame(eval_loss), x = c(1:nrounds), y = eval_loss) %>%
add_trace(type = "scatter", mode = "markers+lines",
marker = list(color = "black", line = list(color = "#1E90FFC7", width = 1)),
line = list(color = "#1E90FF80", width = 2)) %>%
layout(xaxis = list(title = "epoch"),yaxis = list(title = "eval_mlogloss"),
title = "train_loss", font = list(family = "Arial", size = 25, color = "black"))
}
function instance
predict

supervised vs unsupervised clustering
upset plot
library()
Add a new chunk by clicking the Insert Chunk button on the toolbar or by pressing Ctrl+Alt+I.
When you save the notebook, an HTML file containing the code and output will be saved alongside it (click the Preview button or press Ctrl+Shift+K to preview the HTML file).
The preview shows you a rendered HTML copy of the contents of the editor. Consequently, unlike Knit, Preview does not run any R code chunks. Instead, the output of the chunk when it was last run in the editor is displayed.
LS0tCnRpdGxlOiAiWEdCb29zdCB3cmFwcGVyIgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKLS0tCgojIFhHQm9vc3Qgd3JhcHBlciBmb3Igc2V1cmF0IHByb2plY3QgaW4gUgoKYGBge3J9CnNvdXJjZSgidGlhbmZlbmdSd3JhcHBlcnMuUiIpCmxpYnJhcnkoeGdib29zdCkKbGlicmFyeShNYXRyaXgpCmxpYnJhcnkobWNsdXN0KQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShTSEFQZm9yeGdib29zdCkKCmRzMCA8LSByZWFkUkRTKCJkczAucmRzIikKZHMxIDwtIHJlYWRSRFMoImRzMS5yZHMiKQpkczIgPC0gcmVhZFJEUygiZHMyLnJkcyIpCmBgYAoKCmBgYHtyfQpYR0Jvb3N0X3RyYWluX2Zyb21fc2V1b2JqIDwtIGZ1bmN0aW9uKHNldW9iaiwgaXNfaGlnaHZhciA9IFQsIHRlc3RfcmF0aW8gPSAwLjMsIHNlZWQgPSA3KQogICMjIHNldCB0ZXN0X3JhdGlvIHRvIDAgdG8gYXZvaWQgZXh0cmFjdGluZyB0ZXN0IGZyb20gZGF0YXNldAp7CiAgc2V0LnNlZWQoc2VlZCkKICBzZXVvYmpfbGFiZWwgPC0gYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoSWRlbnRzKHNldW9iaikpKQogIGlmKGlzLm5hKHNldW9ial9sYWJlbFsxXSkpICMgY2hlY2sgdmFpbGQgSWRlbnRzCiAgewogICAgc3RvcCgiUGxlYXNlIGVuc3VyZSB0aGF0IHNldXJhdCBpZGVudHMgYXJlIGluIG51bWVyaWMgZm9ybXMiKQogIH0KICAjIGNvbG5hbWVzKHNldW9ial9kYXRhKSA8LSBOVUxMCiAgc2V1b2JqX2RhdGEgPC0gZ2V0X2RhdGFfdGFibGUoc2V1b2JqLCBoaWdodmFyID0gVCwgdHlwZSA9ICJkYXRhIikKICB4Z2JfcGFyYW0gPC0gbGlzdChldGEgPSAwLjIsIG1heF9kZXB0aCA9IDYsIAogICAgICAgICAgICAgICAgICAgIHN1YnNhbXBsZSA9IDAuNiwgIG51bV9jbGFzcyA9IGxlbmd0aCh0YWJsZShJZGVudHMoc2V1b2JqKSkpLAogICAgICAgICAgICAgICAgICAgIG9iamVjdGl2ZSA9ICJtdWx0aTpzb2Z0cHJvYiIsIGV2YWxfbWV0cmljID0gJ21sb2dsb3NzJykKICAKICBpZih0ZXN0X3JhdGlvID09IDApIHsKICAgIHNldW9ial90cmFpbl9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoc2V1b2JqX2RhdGEsImRnQ01hdHJpeCIpKSwgbGFiZWwgPSBzZXVvYmpfbGFiZWwpIAogICAgIyB1c2Ugd2hvbGUgZGF0YXNldCBhcyB0cmFpbiBkYXRhCiAgICBzZXVvYmpfdHJhaW4gPC0geGdiLkRNYXRyaXgoZGF0YSA9IHNldW9ial90cmFpbl9kYXRhJGRhdGEsbGFiZWwgPSBzZXVvYmpfdHJhaW5fZGF0YSRsYWJlbCkKICAgIGJzdF9tb2RlbCA8LSB4Z2IudHJhaW4oeGdiX3BhcmFtLCBzZXVvYmpfdHJhaW4sIG5yb3VuZHMgPSAxMDAsIHZlcmJvc2UgPSAwKQogIH0gZWxzZSB7CiAgICBpbmRleCA8LSBjKDE6ZGltKHNldW9ial9kYXRhKVsyXSkgJT4lIHNhbXBsZShjZWlsaW5nKHRlc3RfcmF0aW8qZGltKHNldW9ial9kYXRhKVsyXSksIHJlcGxhY2UgPSBGLCBwcm9iID0gTlVMTCkKICAgIHNldW9ial90cmFpbl9kYXRhIDwtIGxpc3QoZGF0YSA9IHQoYXMoc2V1b2JqX2RhdGFbLC1pbmRleF0sImRnQ01hdHJpeCIpKSwgbGFiZWwgPSBzZXVvYmpfbGFiZWxbLWluZGV4XSkKICAgIHNldW9ial90ZXN0X2RhdGEgPC0gbGlzdChkYXRhID0gdChhcyhzZXVvYmpfZGF0YVssaW5kZXhdLCJkZ0NNYXRyaXgiKSksIGxhYmVsID0gc2V1b2JqX2xhYmVsW2luZGV4XSkKICAgIHNldW9ial90ZXN0IDwtIHhnYi5ETWF0cml4KGRhdGEgPSBzZXVvYmpfdGVzdF9kYXRhJGRhdGEsbGFiZWwgPSBzZXVvYmpfdGVzdF9kYXRhJGxhYmVsKQogICAgc2V1b2JqX3RyYWluIDwtIHhnYi5ETWF0cml4KGRhdGEgPSBzZXVvYmpfdHJhaW5fZGF0YSRkYXRhLGxhYmVsID0gc2V1b2JqX3RyYWluX2RhdGEkbGFiZWwpCiAgICB3YXRjaGxpc3QgPC0gbGlzdCh0cmFpbiA9IHNldW9ial90cmFpbiwgZXZhbCA9IHNldW9ial90ZXN0KQogICAgYnN0X21vZGVsIDwtIHhnYi50cmFpbih4Z2JfcGFyYW0sIHNldW9ial90cmFpbiwgbnJvdW5kcyA9IDEwMCwgd2F0Y2hsaXN0LCB2ZXJib3NlID0gMCkKICB9CiAgcmV0dXJuKGJzdF9tb2RlbCkKfQojIHNhdmVSRFMoYnN0X21vZGVsLCAiZHMyX21vZGVsLnJkcyIpCgpzaG93X3RyYWluX2xvc3MgPC0gZnVuY3Rpb24oYnN0X21vZGVsLCBucm91bmRzID0gMTAwKSAjd2hlbiAkIHRlc3RfcmF0aW8gXG5lcSAwICQgc2hvdyBsb3NzIGluIHdhdGNobGlzdAp7CiAgZXZhbF9sb3NzIDwtIGJzdF9tb2RlbFtbImV2YWx1YXRpb25fbG9nIl1dW1siZXZhbF9tbG9nbG9zcyJdXQogIHBsb3RfbHkoZGF0YS5mcmFtZShldmFsX2xvc3MpLCB4ID0gYygxOm5yb3VuZHMpLCB5ID0gZXZhbF9sb3NzKSAlPiUgCiAgICBhZGRfdHJhY2UodHlwZSA9ICJzY2F0dGVyIiwgbW9kZSA9ICJtYXJrZXJzK2xpbmVzIiwgCiAgICAgICAgICAgICAgbWFya2VyID0gbGlzdChjb2xvciA9ICJibGFjayIsIGxpbmUgPSBsaXN0KGNvbG9yID0gIiMxRTkwRkZDNyIsIHdpZHRoID0gMSkpLAogICAgICAgICAgICAgIGxpbmUgPSBsaXN0KGNvbG9yID0gIiMxRTkwRkY4MCIsIHdpZHRoID0gMikpICU+JSAKICAgIGxheW91dCh4YXhpcyA9IGxpc3QodGl0bGUgPSAiZXBvY2giKSx5YXhpcyA9IGxpc3QodGl0bGUgPSAiZXZhbF9tbG9nbG9zcyIpLCAKICAgICAgICAgICB0aXRsZSA9ICJ0cmFpbl9sb3NzIiwgZm9udCA9IGxpc3QoZmFtaWx5ID0gIkFyaWFsIiwgc2l6ZSA9IDI1LCBjb2xvciA9ICJibGFjayIpKQp9CgpYR0Jvb3N0X3ByZWRpY3RfZnJvbV9zZXVvYmogPC0gZnVuY3Rpb24oc2V1b2JqLCBic3RfbW9kZWwsIGlzX2hpZ2h2YXIgPSBULCBzZWVkID0gNykKICAjcmV0dXJuIGEgdXBkYXRlZCBzZXVyYXQgb2JqZWN0IHdpdGggbmV3IG1ldGFkYXRhIG5hbWVkIGNvbmZpZGVuY2UgYW5kIHByb2plY3RlZF9pZGVudHMgCnsKICBzZXVvYmpfbGFiZWwgPC0gYXMubnVtZXJpYyhhcy5jaGFyYWN0ZXIoSWRlbnRzKHNldW9iaikpKQogIGlmKGlzLm5hKHNldW9ial9sYWJlbFsxXSkpICMgY2hlY2sgdmFpbGQgSWRlbnRzCiAgewogICAgc3RvcCgiUGxlYXNlIGVuc3VyZSB0aGF0IHNldXJhdCBpZGVudHMgYXJlIGluIG51bWVyaWMgZm9ybXMiKQogIH0KICB0ZW1wIDwtIGdldF9kYXRhX3RhYmxlKHNldW9iaiwgaGlnaHZhciA9IFQsIHR5cGUgPSAiZGF0YSIpCiAgc2V1b2JqX2RhdGEgPC0gbWF0cml4KGRhdGEgPSAwLCBucm93ID0gYnN0X21vZGVsJG5mZWF0dXJlcywgbmNvbCA9IGxlbmd0aChjb2xuYW1lcyh0ZW1wKSksIAogICAgICAgICAgICAgICAgICAgICBieXJvdyA9IEZBTFNFLCBkaW1uYW1lcyA9IGxpc3QoYnN0X21vZGVsW1siZmVhdHVyZV9uYW1lcyJdXSxjb2xuYW1lcyh0ZW1wKSkpCiAgaW50ZXJzZWN0X2ZlYXR1cmVzIDwtIGludGVyc2VjdChic3RfbW9kZWxbWyJmZWF0dXJlX25hbWVzIl1dLCByb3duYW1lcyh0ZW1wKSkKICBzZXVvYmpfZGF0YVtpbnRlcnNlY3RfZmVhdHVyZXMsXSA8LSB0ZW1wW2ludGVyc2VjdF9mZWF0dXJlcyxdCiAgcm0odGVtcCkKICAKCiAgIyBjb2xuYW1lcyhzZXVvYmpfZGF0YSkgPC0gTlVMTAogIHNldW9ial90ZXN0X2RhdGEgPC0gbGlzdChkYXRhID0gdChhcyhzZXVvYmpfZGF0YSwiZGdDTWF0cml4IikpLCBsYWJlbCA9IHNldW9ial9sYWJlbCkKICBzZXVvYmpfdGVzdCA8LSB4Z2IuRE1hdHJpeChkYXRhID0gc2V1b2JqX3Rlc3RfZGF0YSRkYXRhLGxhYmVsID0gc2V1b2JqX3Rlc3RfZGF0YSRsYWJlbCkKICAKICAj6aKE5rWL57uT5p6cCiAgcHJlZGljdF9zZXVvYmpfdGVzdCA8LSBwcmVkaWN0KGJzdF9tb2RlbCwgbmV3ZGF0YSA9IHNldW9ial90ZXN0KQogIAogIHByZWRpY3RfcHJvcF9zZXVvYmogPDwtIG1hdHJpeChkYXRhPXByZWRpY3Rfc2V1b2JqX3Rlc3QsIG5yb3cgPSBic3RfbW9kZWxbWyJwYXJhbXMiXV1bWyJudW1fY2xhc3MiXV0sIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIG5jb2wgPSBuY29sKHNldW9iaiksIGJ5cm93ID0gRkFMU0UsIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgIGRpbW5hbWVzID0gbGlzdChhcy5jaGFyYWN0ZXIoMDooYnN0X21vZGVsW1sicGFyYW1zIl1dW1sibnVtX2NsYXNzIl1dLTEpKSwgY29sbmFtZXMoc2V1b2JqKSkpCgogICMgcHJlZGljdCBjZWxsIHR5cGVzCiAgIyBzZXVvYmpfcmVzIDwtIGFwcGx5KHByZWRpY3RfcHJvcF9zZXVvYmosMixpZGVudF9hc3NpZ25mdW5jLHJvd25hbWVzKHByZWRpY3RfcHJvcF9zZXVvYmopKQogIHNldW9ial9yZXMgPC0gYXBwbHkocHJlZGljdF9wcm9wX3NldW9iaiwyLGlkZW50X2Fzc2lnbmZ1bmMyLHJvd25hbWVzKHByZWRpY3RfcHJvcF9zZXVvYmopKQogIAogIHByaW50KCJBUkkgPSIpCiAgcHJpbnQoYWRqdXN0ZWRSYW5kSW5kZXgoc2V1b2JqX3Jlcywgc2V1b2JqX3Rlc3RfZGF0YSRsYWJlbCkpCiAgCiAgc2V1b2JqIDwtIEFkZE1ldGFEYXRhKHNldW9iaiwgZGF0YS5mcmFtZSh0KHByZWRpY3RfcHJvcF9zZXVvYmopLCBzdHJpbmdzQXNGYWN0b3JzPUYpKQogIHNldW9iaiRwcm9qZWN0ZWRfaWRlbnRzIDwtIGZhY3RvcihzZXVvYmpfcmVzKSAjc2F2ZSBhbmQgdXBkYXRlIHNldXJhdCBvYmplY3QKICByZXR1cm4oc2V1b2JqKQp9CgojIyBhc3NpZ24gY2VsbCB0eXBlIHByZWRpY3RlZCB2aWEgdHJlZSBtb2RlbHMsIGNvbnNpZGVyIGNvbmZpZGVuY2UKaWRlbnRfYXNzaWduZnVuYyA8LSBmdW5jdGlvbihzLCBpZGVudCkgewogICAgaWYgKG1heChzKSA+IDEuNSAvIGxlbmd0aChpZGVudCkpIHsKICAgICAgICAgIHJldHVybihpZGVudFt3aGljaChzID09IG1heChzKSldKQogICAgICB9IGVsc2UgewogICAgICAgICAgcmV0dXJuKCJ1bmFzc2lnbmVkIikKICAgICAgfQp9CgppZGVudF9hc3NpZ25mdW5jMiA8LSBmdW5jdGlvbihzLCBpZGVudCkgCiMgY29uZmlkZW5jZSA6IG1heCAtIDJ0aF9tYXggPiAwLjUKewogICAgaWYgKG1heChzKSAtIG1heChzW3MhPW1heChzKV0pID4gMC41KSB7CiAgICAgICAgICByZXR1cm4oaWRlbnRbd2hpY2gocyA9PSBtYXgocykpXSkKICAgICAgfSBlbHNlIHsKICAgICAgICAgIHJldHVybigidW5hc3NpZ25lZCIpCiAgICAgIH0KfQoKcHJvamVjdDJyZWZfY2VsbHR5cGUgPC0gZnVuY3Rpb24ocXVlcnlfc2V1b2JqLCByZWZfc2V1b2JqKSAjIGFkZCByZWZfY2VsbHR5cGUgdG8gbWV0YS5kYXRhIGluIHF1ZXJ5IHNldXJhdCBvYmplY3QKewogIGluZGVudG1hcCA8LSBsZXZlbHMocmVmX3NldW9iaiRzZXVyYXRfY2x1c3RlcnMpCiAgbmFtZXMoaW5kZW50bWFwKSA8LSBsZXZlbHMocmVmX3NldW9iaiRDbGFzc2lmaWNhdGlvbjEpCiAgZGYgPC0gcXVlcnlfc2V1b2JqJHByb2plY3RlZF9pZGVudHMKICBsZXZlbHMoZGYpIDwtIGMobmFtZXMoaW5kZW50bWFwKSwidW5hc3NpZ25lZCIpCiAgcXVlcnlfc2V1b2JqJHJlZl9jZWxsdHlwZSA8LSBkZgogIHJldHVybihxdWVyeV9zZXVvYmopCn0KCmBgYAoKLS0tCiMjIGZ1bmN0aW9uIGluc3RhbmNlCiMjIyB0cmFpbgpgYGB7cn0KdW1hcHBsb3QoZHMyLCBncm91cC5ieSA9ICJzZXVyYXRfY2x1c3RlcnMiKQpJZGVudHMoZHMyKSA8LSBkczIkc2V1cmF0X2NsdXN0ZXJzCmJzdF9tb2RlbCA8LSBYR0Jvb3N0X3RyYWluX2Zyb21fc2V1b2JqKGRzMikKc2hvd190cmFpbl9sb3NzKGJzdF9tb2RlbCkKYGBgCgojIyBmdW5jdGlvbiBpbnN0YW5jZQojIyMgcHJlZGljdApgYGB7cn0KZHMxIDwtIFhHQm9vc3RfcHJlZGljdF9mcm9tX3NldW9iaihkczEsIGJzdF9tb2RlbCA9IGJzdF9tb2RlbCkgJT4lIHByb2plY3QycmVmX2NlbGx0eXBlKHJlZl9zZXVvYmogPSBkczIpCnVtYXBwbG90KGRzMSwgZ3JvdXAuYnkgPSAicmVmX2NlbGx0eXBlIikKYGBgCgojIyBzdXBlcnZpc2VkIHZzIHVuc3VwZXJ2aXNlZCBjbHVzdGVyaW5nCiMjIyB1cHNldCBwbG90CmBgYHtyfQpsaWJyYXJ5KCkKYGBgCgoKQWRkIGEgbmV3IGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqSW5zZXJ0IENodW5rKiBidXR0b24gb24gdGhlIHRvb2xiYXIgb3IgYnkgcHJlc3NpbmcgKkN0cmwrQWx0K0kqLgoKV2hlbiB5b3Ugc2F2ZSB0aGUgbm90ZWJvb2ssIGFuIEhUTUwgZmlsZSBjb250YWluaW5nIHRoZSBjb2RlIGFuZCBvdXRwdXQgd2lsbCBiZSBzYXZlZCBhbG9uZ3NpZGUgaXQgKGNsaWNrIHRoZSAqUHJldmlldyogYnV0dG9uIG9yIHByZXNzICpDdHJsK1NoaWZ0K0sqIHRvIHByZXZpZXcgdGhlIEhUTUwgZmlsZSkuCgpUaGUgcHJldmlldyBzaG93cyB5b3UgYSByZW5kZXJlZCBIVE1MIGNvcHkgb2YgdGhlIGNvbnRlbnRzIG9mIHRoZSBlZGl0b3IuIENvbnNlcXVlbnRseSwgdW5saWtlICpLbml0KiwgKlByZXZpZXcqIGRvZXMgbm90IHJ1biBhbnkgUiBjb2RlIGNodW5rcy4gSW5zdGVhZCwgdGhlIG91dHB1dCBvZiB0aGUgY2h1bmsgd2hlbiBpdCB3YXMgbGFzdCBydW4gaW4gdGhlIGVkaXRvciBpcyBkaXNwbGF5ZWQuCg==